shock.date = as.Date('2020-05-01') # date when policy shock is implemented
sim.end.date = as.Date('2050-12-01')
stp.den = stp.densities.2018 # use 2018 spud-to-prod densities

well.classes = dimnames(lm.regs)
base.spuds = array(NA, sapply(well.classes, length),
                   dimnames=well.classes)

for (wt in c('Oil', 'Gas')) {
  for (f in 1:2) {
    for (o in 1:2) {
      base.spuds[wt,f,o] = spudcounts[year(spud_month)==year(sim.start.date) & 
                                        prod_type==wt
                                      & fed.land==f-1
                                      & offshore==o-1, mean(N)]
    }
  }
}
# Add adjustment for the fact that we're missing spud dates
# for a fair number of 2018 wells.
# Among wells coming online in 2019 (many of which must have been drilled in 2018),
# we're missing spud date 5-10% of the time, indicating base.spuds
# will be a 5-10% undercount. Scale up to match the implied # of wells not missing.
# load(data.dir%&%'/Working_Data/dt_2020-05-08.RData')
share.with.spud = dt[year(first_prod_month)==year(sim.start.date)+1, mean(!is.na(spud_date)), by=.(prod_type, fed.land, offshore)]
share.with.spud = dcast(share.with.spud, . ~ prod_type + fed.land + offshore, value.var='V1')
share.with.spud[, '.' := NULL]

well.classes = dimnames(base.spuds)
well.class.grid = expand.grid(well.classes)
well.class.string = apply(as.matrix(well.class.grid), 1, paste, collapse='_')

setcolorder(share.with.spud, c(c('Oil_0','Gas_0','Oil_1','Gas_1')%&%'_0',
                               c('Oil_0','Gas_0','Oil_1','Gas_1')%&%'_1'))
setnames(share.with.spud, c( apply(expand.grid(well.classes), 1, paste, collapse='_')))
share.with.spud.vec = as.numeric(as.matrix(share.with.spud, ncol=1))
names(share.with.spud.vec) = names(share.with.spud)
share.with.spud = share.with.spud.vec
rm(share.with.spud.vec)

base.spuds.adj = array(NA, sapply(well.classes, length),
                       dimnames=well.classes)
# Adjust base spuds
for (wt in c('Oil', 'Gas')) {
  for (f in c('Nonfederal','Federal')) {
    for (o in c('Onshore','Offshore')) {
      base.spuds.adj[wt,f,o] = base.spuds[wt,f,o]/share.with.spud[paste(wt,f,o,sep='_')]
    }
  }
}
base.spuds.adj

# Baseline royalty rates
roy.base = array(NA, sapply(well.classes, length),
                 dimnames=well.classes)
roy.base[,'Nonfederal',] = 0.18
roy.base[,'Federal',] = 0.125
roy.base[,'Federal','Offshore'] = 0.1875 # for deepwater, vast majority of offshore

# Test with changes to fees/royalties to ensure it's working correctly in fed/nonfed cases
carb.add.fed = 10; carb.add.nonfed = 0; # in $/tCO2e
oil.emis.rate.fed = 0.43; oil.emis.rate.nonfed = 0.43; # tCO2/bbl
# gas.emis.rate.fed = 0.0549; gas.emis.rate.nonfed = 0.0549;
gas.emis.rate.fed = (117+28.55)/2204.62; gas.emis.rate.nonfed = (117+28.55)/2204.62 # tCO2e/mcf, including 2.36% methane leakage at 100 year GWP=34 (Raimi email on 5/4/20, 10:35am)
roy.fed.new=.1875

# Test with no changes to fees/royalties to see what happens in the baseline
carb.add.fed = 0; carb.add.nonfed = 0; # in $/tCO2e
roy.fed.new = 0.125

spudcounts.class = dcast(spudcounts, spud_month ~ prod_type+fed.land+offshore, value.var = 'N')
setcolorder(spudcounts.class, c('spud_month',c('Oil_0','Gas_0','Oil_1','Gas_1')%&%'_0',
                                c('Oil_0','Gas_0','Oil_1','Gas_1')%&%'_1'))
setnames(spudcounts.class, c('date', apply(expand.grid(well.classes), 1, paste, collapse='_')))

# Adjust spudcounts according to missing values.
# Note that since this is a constant scaling, rerunning the drilling regressions
# with this adjusted dataset would have no effect. It wouldn't even change the constant,
# since the regression is in log differences, so this scaling would net out.
spudcounts.class = data.table(spudcounts.class[,'date'],
                              spudcounts.class[,-'date']/share.with.spud)

# Set up production profiles
profiles.use = profiles.2009.plus

prof.idx = CJ(WellAge=profiles.use[, unique(WellAge)],
              fed.land=profiles.use[, unique(fed.land)],
              offshore=profiles.use[, unique(offshore)],
              prod_type=profiles.use[, unique(prod_type)])

profiles.use = merge(profiles.use, prof.idx, 
                     # by=c('prod_type','fed.land','WellAge'), all.y=TRUE)
                     by=c('prod_type','fed.land','offshore','WellAge'), all.y=TRUE)
# Set up oil profiles
profiles.liq.class = dcast(profiles.use, WellAge ~ prod_type + fed.land + offshore, 
                           value.var = c('liq.daily.mean'))
profiles.liq.class
setcolorder(profiles.liq.class, c('WellAge', c('Oil_0','Gas_0','Oil_1','Gas_1')%&%'_0',
                                  c('Oil_0','Gas_0','Oil_1','Gas_1')%&%'_1'))
setnames(profiles.liq.class, c('WellAge', apply(expand.grid(well.classes), 1, paste, collapse='_')))

# Set up gas profiles
profiles.gas.class = dcast(profiles.use, WellAge ~ prod_type + fed.land + offshore, 
                           value.var = c('gas.daily.mean'))
profiles.gas.class
setcolorder(profiles.gas.class,  c('WellAge', c('Oil_0','Gas_0','Oil_1','Gas_1')%&%'_0',
                                   c('Oil_0','Gas_0','Oil_1','Gas_1')%&%'_1'))
setnames(profiles.gas.class, c('WellAge', apply(expand.grid(well.classes), 1, paste, collapse='_')))

# Replace NA values with 0
ind <- which(sapply(profiles.liq.class, is.numeric))
for(j in ind){
  set(profiles.liq.class, i = which(is.na(profiles.liq.class[[j]])), j = j, value = 0)
}
ind <- which(sapply(profiles.gas.class, is.numeric))
for(j in ind){
  set(profiles.gas.class, i = which(is.na(profiles.gas.class[[j]])), j = j, value = 0)
}
profiles.liq.class
profiles.gas.class

# Cut off at 10 years
# This consistent  with using only pre-sim data.
# elapsed_months(sim.start.date, as.Date('2009-01-01'))+1
profiles.liq.class = profiles.liq.class[WellAge<=12*10-1]
profiles.gas.class = profiles.gas.class[WellAge<=12*10-1]

# Convert to as % of IP
profiles.liq.class = as.matrix(profiles.liq.class[, -'WellAge'])
profiles.gas.class = as.matrix(profiles.gas.class[, -'WellAge'])

profiles.pct.liq.class = profiles.liq.class/rep.row(profiles.liq.class[2,], nrow(profiles.liq.class))
profiles.pct.gas.class = profiles.gas.class/rep.row(profiles.gas.class[2,], nrow(profiles.gas.class))

# Fit arps curve to project production for years 10-30
fit.arps = function(data) {
  library(data.table)
  data = data.table(q=data, WellAge=1:length(data))
  library(minpack.lm)
  nlsLM(q ~ qi/((1+b*a*WellAge)^(1/b)),
        data=data,
        start=list(qi=max(data[,q],na.rm=TRUE), a=0.1, b=1),
        control=nls.control(warnOnly=TRUE))
}
 
project.arps = function(data, proj.period=1:360) {
  arps.temp = fit.arps(data)
  arps.pred = predict(arps.temp, newdata=data.table(WellAge=proj.period))
  arps.pred[is.na(arps.pred) | arps.pred<0] = 0 # if any go negative or NA, set to 0
  return(arps.pred)
}

project.arps.tail = function(data, proj.start=61, proj.end = 360) {
  # data should be a vector of production by month
  fitting.data = data[2:(proj.start-1)] # Start with 2 (first full month)
  # Predict production for years 11-30
  arps.pred = project.arps(fitting.data,
                           proj.period = proj.start:proj.end)
  # Stack historical data with projected
  combined.q = c(data[1:(proj.start-1)], arps.pred)
  return(combined.q)
  
}

# Project using the first 5 years. 5 years because data gets noisy and biased at the 10 year mark (few wells 
# since at that point it's really just wells drilled in early 2019)
# Stack data on first 5 years with projections beyond that.
profiles.pct.liq.class = apply(profiles.pct.liq.class, 2, project.arps.tail)
profiles.pct.gas.class = apply(profiles.pct.gas.class, 2, project.arps.tail)

#### Prepare IP values
# Use 2019 IPs, assumed to be constant. Compare to EIA projections.
ip.by.type.year = dt[, .(.N, liq.ip = mean(liq_prac_ip_daily), 
                         gas.ip = mean(gas_prac_ip_daily)),
                     by=.(fed.land, prod_type, offshore, year(first_prod_month))]
ip.by.type.year = ip.by.type.year[order(fed.land, prod_type, offshore, year)]

ip.base.year = year(sim.start.date)+1
ip.liq.class = dcast(ip.by.type.year[year==ip.base.year], # year after simulation start
                     year ~ prod_type + fed.land + offshore, value.var=c('liq.ip'))
ip.liq.class = ip.liq.class[,-'year']
setcolorder(ip.liq.class, c(c('Oil_0','Gas_0','Oil_1','Gas_1')%&%'_0',
                            c('Oil_0','Gas_0','Oil_1','Gas_1')%&%'_1'))
setnames(ip.liq.class, apply(expand.grid(well.classes), 1, paste, collapse='_'))
ip.liq.class = as.matrix(ip.liq.class)

ip.gas.class = dcast(ip.by.type.year[year==ip.base.year],
                     year ~ prod_type + fed.land + offshore, value.var=c('gas.ip'))
ip.gas.class = ip.gas.class[,-'year']
setcolorder(ip.gas.class, c(c('Oil_0','Gas_0','Oil_1','Gas_1')%&%'_0',
                            c('Oil_0','Gas_0','Oil_1','Gas_1')%&%'_1'))
setnames(ip.gas.class, apply(expand.grid(well.classes), 1, paste, collapse='_'))
ip.gas.class = as.matrix(ip.gas.class)

profiles.liq.class.scaled = profiles.pct.liq.class*rep.row(ip.liq.class, nrow(profiles.pct.liq.class))
profiles.gas.class.scaled = profiles.pct.gas.class*rep.row(ip.gas.class, nrow(profiles.pct.gas.class))

# Add a for loop here to loop over onshore/offshore
# Plot average profiles 2009-2020
for (o in c('Onshore','Offshore')) {
  pdf(output.dir%&%'/Production_profiles_'%&%o%&%'_as_pct_of_IP_'%&%today()%&%'.pdf', family='serif', width=14, height=8)
  par(mfrow=c(1,2))
  xgrid = seq(0, nrow(profiles.liq.class.scaled), by=12*5)
  plot((profiles.pct.liq.class[, "Oil_Nonfederal_"%&%o]), type='l', col=my.cols[1], lwd=2, xaxt='n',
       xlab='Months Since Initial Production', ylab='Oil Produced Per Day (barrels per day)', ylim=c(0,1.2),
       cex=1.5, cex.lab=1.5, cex.axis=1.5)
  axis(1, at=xgrid, cex.lab=1.5, cex.axis=1.5); abline(h=0)
  # axis(1, at=max(xgrid), cex.lab=1.5, cex.axis=1.5)
  grid(col='gray70', nx=NA, ny=NULL)
  abline(v=xgrid, lty=3, col='gray70')
  lines((profiles.pct.liq.class[, "Oil_Federal_"%&%o]), type='l', col=my.cols[2], lwd=2)
  lines((profiles.pct.liq.class[, "Gas_Nonfederal_"%&%o]), type='l', col=my.cols[3], lwd=2)
  lines((profiles.pct.liq.class[, "Gas_Federal_"%&%o]), type='l', col=my.cols[4], lwd=2)
  legend('topright', legend=c('Oil Wells, Nonfederal', 'Oil Wells, Federal', 
                              'Gas Wells, Nonfederal', 'Gas Wells, Federal'), 
         col=my.cols, lty=1, lwd=2, cex=1.5, bg='white')
  
  plot((profiles.pct.gas.class[, "Oil_Nonfederal_"%&%o]), type='l', col=my.cols[1], lwd=2, xaxt='n',
       xlab='Months Since Initial Production', ylab='Gas Produced Per Day (mcf per day)', ylim=c(0,1.2),
       cex=1.5, cex.lab=1.5, cex.axis=1.5)
  axis(1, at=xgrid, cex.lab=1.5, cex.axis=1.5); abline(h=0)
  # axis(1, at=max(xgrid), cex.lab=1.5, cex.axis=1.5)
  grid(col='gray70', nx=NA, ny=NULL)
  abline(v=xgrid, lty=3, col='gray70')
  lines((profiles.pct.gas.class[, "Oil_Federal_"%&%o]), type='l', col=my.cols[2], lwd=2)
  lines((profiles.pct.gas.class[, "Gas_Nonfederal_"%&%o]), type='l', col=my.cols[3], lwd=2)
  lines((profiles.pct.gas.class[, "Gas_Federal_"%&%o]), type='l', col=my.cols[4], lwd=2)
  dev.off()
  
  # Plot profiles used
  pdf(output.dir%&%'/Production_profiles_'%&%o%&%'_scaled_to_'%&%ip.base.year%&%'_IP_'%&%today()%&%'.pdf', 
      family='serif', width=14, height=8)
  par(mfrow=c(1,2))
  xgrid = seq(0, nrow(profiles.liq.class.scaled), by=12*5)
  plot((profiles.liq.class.scaled[, "Oil_Nonfederal_"%&%o]), type='n', col=my.cols[1], lwd=2, xaxt='n',
       xlab='Months Since Initial Production', ylab='Oil Produced Per Day (barrels per day)', 
       # ylim=c(0, max(profiles.liq.class.scaled[,grep(o, colnames(profiles.liq.class.scaled))])),
       cex=1.5, cex.lab=1.5, cex.axis=1.5, ylim=c(0, ifelse(o=='Onshore', 700, 3000)))
  axis(1, at=xgrid, cex.lab=1.5, cex.axis=1.5); abline(h=0)
  # axis(1, at=max(xgrid), cex.lab=1.5, cex.axis=1.5)
  grid(col='gray70', nx=NA, ny=NULL)
  abline(v=xgrid, lty=3, col='gray70')
  lines((profiles.liq.class.scaled[, "Oil_Federal_"%&%o]), type='l', col=my.cols[2], lwd=2)
  lines((profiles.liq.class.scaled[, "Oil_Nonfederal_"%&%o]), type='l', col=my.cols[1], lwd=2)
  lines((profiles.liq.class.scaled[, "Gas_Nonfederal_"%&%o]), type='l', col=my.cols[3], lwd=2)
  lines((profiles.liq.class.scaled[, "Gas_Federal_"%&%o]), type='l', col=my.cols[4], lwd=2)
  legend('topright', legend=c('Oil Wells, Nonfederal', 'Oil Wells, Federal', 
                              'Gas Wells, Nonfederal', 'Gas Wells, Federal'), 
         col=my.cols, lty=1, lwd=2, cex=1.5, bg='white')
  
  plot((profiles.gas.class.scaled[, "Oil_Nonfederal_"%&%o]), type='l', col=my.cols[1], lwd=2, xaxt='n',
       xlab='Months Since Initial Production', ylab='Gas Produced Per Day (mcf per day)',
       # ylim=c(0, max(profiles.gas.class.scaled[,grep(o, colnames(profiles.gas.class.scaled))])),
       cex=1.5, cex.lab=1.5, cex.axis=1.5, ylim=c(0, ifelse(o=='Onshore', 8000, 5000)))
  axis(1, at=xgrid, cex.lab=1.5, cex.axis=1.5); abline(h=0)
  axis(1, at=max(xgrid), cex.lab=1.5, cex.axis=1.5)
  grid(col='gray70', nx=NA, ny=NULL)
  abline(v=xgrid, lty=3, col='gray70')
  lines((profiles.gas.class.scaled[, "Oil_Federal_"%&%o]), type='l', col=my.cols[2], lwd=2)
  lines((profiles.gas.class.scaled[, "Gas_Nonfederal_"%&%o]), type='l', col=my.cols[3], lwd=2)
  lines((profiles.gas.class.scaled[, "Gas_Federal_"%&%o]), type='l', col=my.cols[4], lwd=2)
  dev.off()
}


### Pull in demand and foreign supply forecasts, make demand and foreign supply functions ----
## Demand
iea.dir = data.dir%&%'/IEA/'
library(openxlsx)
weo.projections = data.table(read.xlsx(iea.dir%&%'/Formatted WEO Oil and Gas Projections.xlsx',
                                       sheet = 'WEO_projections_for_output', startRow = 1,
                                       na.strings = 'NA'))
setnames(weo.projections, tolower(names(weo.projections)))

# Note: convert gas production data from bcm/year to bcf/d
bcf.per.bcm = 35.31467 # http://www.kylesconverter.com/volume/billion-cubic-feet-to-cubic-meters
bcfd.per.bcmy = bcf.per.bcm/365.25

weo.projections[, gas_prod_us_bcfd := gas_prod_us_bcm*bcfd.per.bcmy]
weo.projections[, gas_prod_non_us_bcfd := gas_prod_non_us_bcm*bcfd.per.bcmy]
weo.projections[, us_gas_cons_bcfd := us_gas_cons_bcm*bcfd.per.bcmy]
weo.projections[, world_gas_cons_bcfd := world_gas_cons_bcm*bcfd.per.bcmy]
weo.projections[, c('gas_prod_us_bcm','gas_prod_non_us_bcm','us_gas_cons_bcm','world_gas_cons_bcm') := NULL]

weo.projections[, row_crude_elasticity:=as.numeric(row_crude_elasticity)]
weo.projections[, row_gas_elasticity:=as.numeric(row_gas_elasticity)]

weo.projections[year>year(sim.start.date), mean(row_crude_elasticity), by=year]
weo.projections[year>year(sim.start.date), mean(row_gas_elasticity), by=year]
weo.projections[year>year(sim.start.date), mean(row_crude_elasticity)]
weo.projections[year>year(sim.start.date), mean(row_gas_elasticity)]

# Convert WEO prices & spreads from 2018$ to 2020
weo.projections[, iea_crude_price := iea_crude_price*cpi.base/cpi[year(DATE)==2018, mean(CPIAUCSL)] ]
weo.projections[, gas_price_us := gas_price_us*cpi.base/cpi[year(DATE)==2018, mean(CPIAUCSL)] ]
weo.projections[, eu_us_gas_spread := eu_us_gas_spread*cpi.base/cpi[year(DATE)==2018, mean(CPIAUCSL)] ]
weo.projections[, china_us_gas_spread :=  china_us_gas_spread*cpi.base/cpi[year(DATE)==2018, mean(CPIAUCSL)] ]
weo.projections[, japan_us_gas_spread := japan_us_gas_spread*cpi.base/cpi[year(DATE)==2018, mean(CPIAUCSL)] ]
weo.projections[, japan_us_gas_spread := NULL ]
## Use the approx() function on the log of all of these values to interpolate/extrapolate
## for every year from 2018 through 2050.
library(Hmisc)

extrap.dates = seq.Date(from=as.Date('2018-01-01'),
                        to=sim.end.date, by='months')
extrap.dates = seq.Date(from=min(price.path[, date]),
                        to=max(price.path[, date]), by='months')
weo.projections[, year.numer := year+0.5]

extrap.dates.numer = seq(year(min(extrap.dates)) + (month(min(extrap.dates))-1)/12,
                         year(max(extrap.dates)) + (month(max(extrap.dates))-1)/12, 
                         by=1/12) # 2018 = Jan 1 2018... 2018 + 11/12 = Dec 1 2018
dim(weo.projections)
weo.proj.interp = sapply(weo.projections[,-c('scenario','year','year.numer')], function(y) exp(approxExtrap(y=log(y), x=weo.projections[,year.numer], xout=extrap.dates.numer)$y))
weo.projections = data.table(year.numer=extrap.dates.numer, weo.proj.interp)
weo.projections[, year:=floor(year.numer)]
weo.projections[, date:=as.Date(paste(year, round(1 + (year.numer-year)*12, 0), 1, sep='-'))]

# Bring in WTI/Brent/HH futures prices as baseline price path
weo.projections = merge(weo.projections, price.path[, .(date, wti.future=wti, brent.future=brent, hh.future=hh)], by='date',
                        all.x=TRUE, all.y=FALSE)
# These futures strips will be the BASELINE price path. 

# Long-run oil demand elasticity
lr.oil.dem.elasts = c(Huntington = -0.15, 
                      Balke = -0.51,
                      Dahl = mean(c(-0.13,-0.26)),
                      Cooper = mean(c(-0.18, -0.45)),
                      HausmanNewey = -0.8,
                      Kilian = -0.26) # note: Kilian is an IMPACT elasticity. 
lr.gas.dem.elasts = c(HausmanKellogg = mean(c(-0.57, -0.47, -0.20, -0.23)),
                      AuffRubin = mean(c(-0.23, -0.17)),
                      Arora = -0.24,
                      NewellRaimi = -0.1)

# US adjustment for Drillinginfo's understatement of EIA reported production
# Read in EIA crude and marketed gas production data.
# Calculate US.adjustment.factors as the ratio to 2019 actuals to 2019 modeled
eia.crude = fread(data.dir%&%'/EIA/U.S._crude_oil_production.csv', skip=4)
eia.crude[, Month:=as.Date(Month)]

eia.gas = fread(data.dir%&%'/EIA/U.S._Natural_Gas_Gross_Withdrawals.csv', skip=4)
eia.gas[, Month:=as.Date(Month)]

eia.crude.2018 = eia.crude[year(Month)==2018, mean(`U.S. Crude Oil (Thousand Barrels per Day)  thousand barrels per day`)/1e3]
eia.gas.2018 = eia.gas[year(Month)==2018, mean(`U.S. Natural Gas Gross Withdrawals  Million Cubic Feet`)/1e3]
eia.gas.2018 = eia.gas.2018/(365.25/12) # convert to bcfd

DI.crude.2018 = tot.prod.by.fed.off.type[year(prod_date)==2018, sum(liq), by=prod_date][, mean(V1)]
DI.gas.2018 = tot.prod.by.fed.off.type[year(prod_date)==2018, sum(gas), by=prod_date][, mean(V1)]

# Model slightly slightly underpredicts oil and gas due to missing wells.
# For this reason, we do an adjustment based on historical ratio of EIA production and 
# in-sample production.
US.adj = c(liq = eia.crude.2018/DI.crude.2018,
           gas = eia.gas.2018/DI.gas.2018)
US.adj
